#!/usr/bin/env python
# encoding: utf-8
import re
import time

from console_pipeline import ConsolePipeline
from request import Request
from spider import Spider


class XVideoProcess(object):
    start_url = "https://www.xvideos.com"
    headers = None

    def process(self, html):
        items = {}
        pattern = re.compile(
            '<div.*?thumb-block.*?thumb"><a href="(.*?)"><img.*?data-src="(.*?)".*?</div><p><a .*?title="(.*?)">.*?</div>',
            re.S)
        elements = re.findall(pattern, html)
        for element in elements:
            time.sleep(2)
            items["title"] = element[1]
            items["link"] = element[0]
            msg = "title:%s, link:%s" % (element[1], element[2])
            print(msg)
            url = 'https://www.xvideos.com' + items["link"]
            request = Request(url=url, callback=self.process_detail)
            yield request

    def process_detail(self, html):
        pattern = re.compile("html5player.setVideoUrlHigh\\('(.*?)'\\)")
        items = re.findall(pattern, html)
        titlePattern = re.compile("<title>(.*?)</title>", re.S)
        title = re.findall(titlePattern, html)
        print('detail:', items[0] + ', ' + title[0])
        yield items[0]


if __name__ == "__main__":
    Spider(XVideoProcess()).addPipeline(ConsolePipeline()).start()
